#!/bin/sh
#
# Provisioning script for Stratio Sandbox.
# More info at https://github.com/Stratio/sandbox
#
#

echo -e 'Installing Stratio Deep packages...'

##############
## SCALA    ##
##############
echo -e 'Installing Scala...'
rpm -q stratio-scala || yum -y -q --nogpgcheck install stratio-scala 

##############
## SPARK    ##
##############
echo -e 'Installing Spark...'
rpm -q stratio-spark || yum -y -q --nogpgcheck install stratio-spark 

ipaddress=$(ip -4 addr show dev eth0 | grep inet | sed 's/^ *//g' | cut -d' ' -f 2 | cut -d'/' -f1)
mkdir /opt/sds/spark/.ssh
chmod 700 /opt/sds/spark/.ssh
ssh-keygen -t rsa -b 2048 -f /opt/sds/spark/.ssh/id_rsa -N ""
cp /opt/sds/spark/.ssh/id_rsa.pub /opt/sds/spark/.ssh/authorized_keys
chown -R spark:spark /opt/sds/spark/.ssh/
cat >/etc/init.d/spark <<EOF
#!/bin/bash

### BEGIN INIT INFO
# Provides: spark
# Required-Start: \$remote_fs
# Required-Stop: \$remote_fs
# Should-Stop: \$all
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Short-Description: Startup script for SDS Deep
# Description: SDS Deep is
### END INIT INFO

# Developed by pmadrid@stratio.com
# Version: 0.1 2014
# When I learn scripting a bit better, I'll try to improve this one...

NAME=spark
DESC="Spark cluster computing system"
BASEDIR="/opt/sds/spark"
CONFDIR="/etc/sds/spark"
LOGFILE="/var/log/sds/spark/spark.log"
pidDir="/var/run/sds"
serviceUser="root"

# If JAVA_HOME has not been set, try to determine it.
JVM_SEARCH_DIRS="/usr/java/default /usr/java/latest /opt/java"
if [ -z "\$JAVA_HOME" ]; then
   # If java is in PATH, use a JAVA_HOME that corresponds to that.
   java="\`/usr/bin/which java 2>/dev/null\`"
   if [ -n "\$java" ]; then
      java=\`readlink --canonicalize "\$java"\`
      JAVA_HOME=\`dirname "\\\`dirname \\\$java\\\`"\`
   else
      # No JAVA_HOME set and no java found in PATH; search for a JVM.
      for jdir in \$JVM_SEARCH_DIRS; do
         if [ -x "\$jdir/bin/java" ]; then
            JAVA_HOME="\$jdir"
            break
         fi
      done
   fi
fi
if [ -z "\$JAVA_HOME" ]; then
   echo "Error: JAVA_HOME is not defined correctly." 1>&2
   echo "We cannot execute \$JAVA" 1>&2
   exit 1
fi
export JAVA_HOME

# Read configuration variable file if it is present
[ -r /etc/default/\$NAME ] && . /etc/default/\$NAME

if [ ! -d "\$pidDir" ]; then
   mkdir -p "\$pidDir" || exit 1
   chgrp sds \$pidDir && chmod 775 \$pidDir
fi

# Returns 0 if the process with PID \$1 is running.
function checkProcessIsRunning {
   local pid="\$1"
   if [ -z "\$pid" -o "\$pid" == " " ]; then return 1; fi
   if [ ! -e /proc/\$pid ]; then return 1; fi
   return 0; }

# Returns 0 if the process with PID \$1 is our Java service process.
function checkProcessIsOurService {
   local pid="\$1"
   local cmd="\$(ps -p \$pid --no-headers -o comm)"
   if [ "\$cmd" != "java" ]; then return 1; fi
   grep -q --binary -F "\$javaCommandLineKeyword" /proc/\$pid/cmdline
   if [ \$? -ne 0 ]; then return 1; fi
   return 0; }

# Returns 0 when the service is running and sets the variable \$servicePid to the PID.
function getServicePid {
   if [ ! -f \$pidFile ]; then return 1; fi
   local servicePid="\$(<\$pidFile)"
   checkProcessIsRunning \$servicePid || return 1
   checkProcessIsOurService \$servicePid || return 1
   return 0; }

case "\$1" in
   start)
      echo "Starting \$DESC "
      su -s /bin/bash - \$serviceUser -c "\$BASEDIR/sbin/start-all.sh" >> \$LOGFILE 2>&1
      if [ \$? -ne 0 ]; then
         echo "Error starting \$DESC"
         exit 1
      fi
      ;;
   stop)
      echo "Stopping \$DESC "
      su -s /bin/bash - \$serviceUser -c "\$BASEDIR/sbin/stop-all.sh" >> \$LOGFILE 2>&1
      if [ \$? -ne 0 ]; then
         echo "Error stopping \$DESC"
         exit 1
      fi
      ;;
   start-master)
      echo "Starting Spark Master: "
      su -s /bin/bash - \$serviceUser -c "\$BASEDIR/sbin/start-master.sh" >> \$LOGFILE 2>&1
      ;;
   stop-master)
      echo "Stopping Spark Master: "
      su -s /bin/bash - \$serviceUser -c "\$BASEDIR/sbin/stop-master.sh" >> \$LOGFILE 2>&1
      ;;
   start-slaves)
      echo "Starting Spark Slaves: "
      su -s /bin/bash - \$serviceUser -c "\$BASEDIR/sbin/start-slaves.sh" >> \$LOGFILE 2>&1
      ;;
   stop-slaves)
      echo "Stopping Spark Slaves: "
      su -s /bin/bash - \$serviceUser -c "\$BASEDIR/sbin/stop-slaves.sh" >> \$LOGFILE 2>&1
      ;;
   reload|restart)
      \$0 stop
      sleep 5
      \$0 start
      ;;
   status-localmaster)
      retval=0
      javaCommandLineKeyword="org.apache.spark.deploy.master.Master"
      pidFile=\$pidDir/spark-spark-org.apache.spark.deploy.master.Master-1.pid
      if getServicePid; then
         servicePid="\$(<\$pidFile)"
         echo "Master process seems to be running with pid \$servicePid"
      else
         echo "Master process seems to be stopped"
         retval=1
      fi
      exit \$retval
      ;;
   status-localworkers)
      retval=0
      javaCommandLineKeyword="org.apache.spark.deploy.worker.Worker"
      for file in \$( ls \$pidDir/spark-spark-org.apache.spark.deploy.worker.Worker-* ); do
         pidFile=\$file
         if getServicePid; then
            servicePid="\$(<\$pidFile)"
            echo "Worker process seems to be running with pid \$servicePid"
         else
            echo "Worker process seems to be stopped"
            retval=1
         fi
      done
      exit \$retval
      ;;
   *)
      echo "Usage: \`basename \$0\` start|stop|restart"
      exit 1
esac

exit 0
EOF
chmod 755 /etc/init.d/spark
chkconfig --add spark
chkconfig spark off
echo "127.0.0.1" >> /etc/sds/spark/slaves
sed -i "s/^\(spark.master\).*$/\1         spark:\/\/$ipaddress:7077/" /etc/sds/spark/spark-defaults.conf
sed -i "s/^export MESOS_NATIVE_LIBRARY=.*$//" /etc/sds/spark/spark-env.sh
sed -i "s/^export MASTER=.*$//" /etc/sds/spark/spark-env.sh
sed -i "s/^#export SPARK_EXECUTOR_URI=.*$//" /etc/sds/spark/spark-env.sh
sed -i "s/\(SPARK_WORKER_MEMORY=\)\"4g\"/\1\"2g\"/" /etc/sds/spark/spark-env.sh
sed -i "s/\(SPARK_WORKER_CORES\)=2/\1=1/" /etc/sds/spark/spark-env.sh
sed -i "s/^#\(SPARK_MASTER_IP=\)/\1$ipaddress/" /etc/sds/spark/spark-env.sh
sed -i "s/^#\(SPARK_EXECUTOR_MEMORY=\)/\1/" /etc/sds/spark/spark-env.sh
sed -i "s/^#\(SPARK_DRIVER_MEMORY=\)/\1/" /etc/sds/spark/spark-env.sh


##############
## SERVICES ##
##############
echo -e 'Starting Stratio Deep services...'
chkconfig cassandra on
chkconfig spark on

service cassandra restart
#service mongod restart
#service elasticsearch restart
sleep 10
service spark restart

##############
## DATASET  ##
##############
touch /tmp/deep.log
chmod 666 /tmp/deep.log

dataset_dir='/opt/sds/dataset'
mkdir $dataset_dir
cd $dataset_dir
wget http://docs.openstratio.org/resources/datasets/crawler-Page.tgz
wget https://github.com/Stratio/deep-spark/blob/develop/doc/resources/table-Page-create.cql
wget https://github.com/Stratio/deep-spark/blob/develop/doc/resources/table-listdomains-create.cql
cat >/opt/sds/dataset/crawler.cql <<EOF
CREATE KEYSPACE crawler WITH replication = {
      'class': 'SimpleStrategy',
      'replication_factor': '1'
};

use crawler;

CREATE TABLE "Page" (
 key text,
 "___class" text,
 charset text,
 content text,
 "domainName" text,
 "downloadTime" bigint,
 "enqueuedForTransforming" bigint,
 etag text,
 "firstDownloadTime" bigint,
 "lastModified" text,
 "responseCode" varint,
 "responseTime" bigint,
 "timeTransformed" bigint,
 title text,
 url text,
 PRIMARY KEY (key)
) WITH
 bloom_filter_fp_chance=0.010000 AND
 caching='KEYS_ONLY' AND
 comment='' AND
 dclocal_read_repair_chance=0.000000 AND
 gc_grace_seconds=864000 AND
 index_interval=128 AND
 read_repair_chance=0.100000 AND
 replicate_on_write='true' AND
 populate_io_cache_on_flush='false' AND
 default_time_to_live=0 AND
 speculative_retry='99.0PERCENTILE' AND
 memtable_flush_period_in_ms=0 AND
 compaction={'class': 'SizeTieredCompactionStrategy'} AND
 compression={'sstable_compression': 'LZ4Compressor'};

 CREATE TABLE listdomains (
 domain text,
 num_pages int,
 PRIMARY KEY (domain)
) WITH
 bloom_filter_fp_chance=0.010000 AND
 caching='KEYS_ONLY' AND
 comment='' AND
 dclocal_read_repair_chance=0.000000 AND
 gc_grace_seconds=864000 AND
 index_interval=128 AND
 read_repair_chance=0.100000 AND
 replicate_on_write='true' AND
 populate_io_cache_on_flush='false' AND
 default_time_to_live=0 AND
 speculative_retry='99.0PERCENTILE' AND
 memtable_flush_period_in_ms=0 AND
 compaction={'class': 'SizeTieredCompactionStrategy'} AND
 compression={'sstable_compression': 'SnappyCompressor'};

EOF

/opt/sds/cassandra/bin/cqlsh -f /opt/sds/dataset/crawler.cql
cd /opt/sds/dataset
tar -zxvf crawler-Page.tgz
/opt/sds/cassandra/bin/sstableloader -d localhost crawler/Page/